  if (!require("tidytext")) install.packages("tidytext"); require("tidytext")
  if (!require("dplyr")) install.packages("dplyr"); require("dplyr")
  if (!require("textdata")) install.packages("textdata"); require("textdata")
  if (!require("readstata13")) install.packages("readstata13"); require("readstata13")
  if (!require("ggplot2")) install.packages("ggplot2"); require("ggplot2")
  
  #Clean environment
  rm(list = ls())
  set.seed(11111001)
  
  # ---- Define folders and relevant values ---- #
  # Detect system and define main folder accordingly:
  if (Sys.info()[["user"]]=="mip694") {
    main_folder <- "C:/Users/mip694/Dropbox/Harvard/IGNITE"
  }
  
  # -- Input and Output folders -- #
  input_data <- file.path(main_folder, "2input_data")
  kites_data <- file.path(input_data, "kites")
  output_data <- file.path(main_folder, "3output_data", "kites_temp")
  figures <- file.path(main_folder, "5figures")
  figures_wc <- file.path(figures, "wordcloud")
  
  # ---------------------------------------------------------------------------------- #
  # Overall
  times <- c("before", "after")
  
  for (time in times) {
    # text data
    name_file <- paste("messages_",time,"Sept2020.dta", sep = "") 
    text_data <- read.dta13(file.path(kites_data, name_file))
    
    # Unnest tokens (words)
    tidy_data <- text_data %>%
      unnest_tokens(word, response, to_lower = T)
    
    # Get sentiments from the lexicon
    nrc_sentiments <- get_sentiments("nrc")
    
    # Join the words with the sentiments
    sentiment_data <- tidy_data %>%
      inner_join(nrc_sentiments)
    
    out_file <- paste("sentiment_",time,"Sept2020.csv", sep = "") 
    write.csv(sentiment_data, file = file.path(output_data, out_file))
  }
  
  # within 14 days before veing released
  times <- c("before", "after")
  
  for (time in times) {
    # text data
    name_file <- paste("messages_",time,"Sept2020_14days.dta", sep = "") 
    text_data <- read.dta13(file.path(kites_data, name_file))
    
    # Unnest tokens (words)
    tidy_data <- text_data %>%
      unnest_tokens(word, response, to_lower = T)
    
    # Get sentiments from the lexicon
    nrc_sentiments <- get_sentiments("nrc")
    
    # Join the words with the sentiments
    sentiment_data <- tidy_data %>%
      inner_join(nrc_sentiments)
    
    out_file <- paste("sentiment_",time,"Sept2020_14days.csv", sep = "") 
    write.csv(sentiment_data, file = file.path(output_data, out_file))
  }
  
  
  
  
  
  # # Count the number of each sentiment
  # sentiment_count <- sentiment_data %>%
  #   count(sentiment)
  # 
  # # Summarize sentiment for all words
  # summary_sentiment <- sentiment_count %>%
  #   group_by(sentiment) %>%
  #   summarize(total = sum(n))
  # 
  # # Share
  # total_sentiments <- sum(summary_sentiment$total)
  # summary_sentiment <- summary_sentiment %>%
  #   mutate(share = total / total_sentiments)
  # 
  # # Print the summary sentiment with share
  # print(summary_sentiment)
  
  ########################################
  ##  TARGETING SPECIFIC WORDS
  ########################################
  # Define your target word
  # target_words <- c("classes", "school", "ignite", "education")
  # 
  # # Sample text data
  # # text_data <- data.frame(
  # #   id = 1:3,
  # #   text = c("I love this product, it's amazing!", 
  # #            "The product is terrible, very disappointing.",
  # #            "The new product is okay, not great but not bad either.")
  # # )
  # 
  # # Tokenize the data into sentences
  # text_data_sentences <- text_data %>%
  #   unnest_tokens(sentence, response, token = "sentences", to_lower = T)  
  # 
  # # Function to check if sentence contains any of the target words
  # contains_target_word <- function(sentence, words) {
  #   any(sapply(words, function(word) grepl(word, sentence)))
  # }
  # 
  # # Filter sentences containing any of the target words
  # target_sentences <- text_data_sentences %>%
  #   filter(sapply(sentence, contains_target_word, words = target_words))
  # 
  # # Get sentiments from the lexicon
  # nrc_sentiments <- get_sentiments("nrc")
  # 
  # # Unnest tokens (words) in the filtered sentences
  # tidy_sentences <- target_sentences %>%
  #   unnest_tokens(word, sentence)
  # 
  # # Join the words with the sentiments
  # sentiment_data <- tidy_sentences %>%
  #   inner_join(nrc_sentiments)
  # 
  # # Count the number of each sentiment for each sentence
  # sentiment_count <- sentiment_data %>%
  #   group_by(request_number) %>%
  #   count(sentiment)
  # 
  # # Print the sentiment counts for each sentence
  # print(sentiment_count)
  # 
  # 
  # # Summarize sentiment for each target word
  # summary_sentiment <- sentiment_count %>%
  #   group_by(sentiment) %>%
  #   summarize(total = sum(n))
  # 
  # # Calculate the share of each sentiment
  # total_sentiments <- sum(summary_sentiment$total)
  # summary_sentiment <- summary_sentiment %>%
  #   mutate(share = total / total_sentiments)
  # 
  # # Print the summary sentiment with share
  # print(summary_sentiment)
  # 
  # 
  #   
  # 
  # # Convert sentiment to factor for ordered plotting
  # sentiment_count$sentiment <- factor(sentiment_count$sentiment, levels = c('negative', 'positive'))
  # 
  # # Create the bar plot
  # ggplot(sentiment_count, aes(fill = sentiment, y = n, x = as.factor(request_number))) +
  #   geom_bar(position = "stack", stat = "identity") +
  #   labs(x = "Sentence ID", y = "Count", fill = "Sentiment") +
  #   ggtitle("Sentiment Counts per Sentence") +
  #   theme_minimal()  
  

  
  
  

  
  
  
  
  
  
  
  
  